{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "908da31e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# from huggingface_hub import snapshot_download\n",
    "\n",
    "# snapshot_download(\n",
    "#     repo_id=\"Fhrozen/FSD50k\",\n",
    "#     repo_type=\"dataset\",\n",
    "#     local_dir = './FSD50k', max_workers = 30,\n",
    "# )\n",
    "# !huggingface-cli download Fhrozen/FSD50k --local-dir=\"./FSD50k\" --max-workers=\"30\" --repo-type=dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "f40b52e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import librosa\n",
    "import soundfile as sf\n",
    "import pandas as pd\n",
    "import os\n",
    "import json\n",
    "from glob import glob\n",
    "from tqdm import tqdm\n",
    "from multiprocess import Pool\n",
    "import itertools\n",
    "import soundfile as sf\n",
    "\n",
    "def chunks(l, n):\n",
    "    for i in range(0, len(l), n):\n",
    "        yield (l[i: i + n], i // n)\n",
    "\n",
    "def multiprocessing(strings, function, cores=6, returned=True):\n",
    "    df_split = chunks(strings, len(strings) // cores)\n",
    "    pool = Pool(cores)\n",
    "    pooled = pool.map(function, df_split)\n",
    "    pool.close()\n",
    "    pool.join()\n",
    "\n",
    "    if returned:\n",
    "        return list(itertools.chain(*pooled))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "e7cfcd35",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mkdir: cannot create directory ‘FSD50k-audio’: File exists\r\n"
     ]
    }
   ],
   "source": [
    "!mkdir FSD50k-audio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "9f98275d",
   "metadata": {},
   "outputs": [],
   "source": [
    "def loop(files):\n",
    "    files, _ = files\n",
    "    for f in tqdm(files):\n",
    "        y, sr = librosa.load(f, sr = 16000)\n",
    "        new_f = os.path.join('FSD50k-audio', f.replace('/', '_').replace('.wav', '.mp3'))\n",
    "        sf.write(new_f, y, sr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "5a8ac65b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "51197"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "audio_files = glob('FSD50k/clips/*/*.wav')\n",
    "len(audio_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "ac072bac",
   "metadata": {},
   "outputs": [],
   "source": [
    "# multiprocessing(audio_files, loop, cores = 20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "bb010014",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 40966/40966 [00:02<00:00, 15586.31it/s]\n"
     ]
    }
   ],
   "source": [
    "data = []\n",
    "\n",
    "df = pd.read_csv('FSD50k/labels/dev.csv')\n",
    "for i in tqdm(range(len(df))):\n",
    "    audio_filename = os.path.join(f\"FSD50k/clips/dev/{df['fname'].iloc[i]}.wav\")\n",
    "    new_f = os.path.join('FSD50k-audio', audio_filename.replace('/', '_').replace('.wav', '.mp3'))\n",
    "    if not os.path.exists(new_f):\n",
    "        continue\n",
    "    data.append({\n",
    "        'answer': df['labels'].iloc[i],\n",
    "        'audio_filename': new_f,\n",
    "        'metadata': json.dumps(df.iloc[i].to_dict())\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "b1a043d1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 10231/10231 [00:00<00:00, 15799.14it/s]\n"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv('FSD50k/labels/eval.csv')\n",
    "for i in tqdm(range(len(df))):\n",
    "    audio_filename = os.path.join(f\"FSD50k/clips/eval/{df['fname'].iloc[i]}.wav\")\n",
    "    new_f = os.path.join('FSD50k-audio', audio_filename.replace('/', '_').replace('.wav', '.mp3'))\n",
    "    if not os.path.exists(new_f):\n",
    "        continue\n",
    "    data.append({\n",
    "        'answer': df['labels'].iloc[i],\n",
    "        'audio_filename': new_f,\n",
    "        'metadata': json.dumps(df.iloc[i].to_dict())\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "38a6401b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "51197"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "d4f9204c",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "labels = []\n",
    "for d in data:\n",
    "    labels.extend(d['answer'].split(','))\n",
    "    \n",
    "labels = str(list(set(labels)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "2ddd9928",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "questions = [\n",
    "    'given the labels\\n{labels}\\n\\nclassify the audio multilabels',\n",
    "    'what are the multilabels for audio\\n\\nthe labels: {labels}'\n",
    "]\n",
    "\n",
    "for i in range(len(data)):\n",
    "    data[i]['question'] = random.choice(questions).format(labels = labels)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "48d1a2e0",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'answer': 'Electric_guitar,Guitar,Plucked_string_instrument,Musical_instrument,Music',\n",
       " 'audio_filename': 'FSD50k-audio/FSD50k_clips_dev_64760.mp3',\n",
       " 'metadata': '{\"fname\": 64760, \"labels\": \"Electric_guitar,Guitar,Plucked_string_instrument,Musical_instrument,Music\", \"mids\": \"/m/02sgy,/m/0342h,/m/0fx80y,/m/04szw,/m/04rlf\", \"split\": \"train\"}',\n",
       " 'question': \"given the labels\\n['Crying_and_sobbing', 'Cat', 'Raindrop', 'Gull_and_seagull', 'Hiss', 'Truck', 'Human_voice', 'Cheering', 'Speech_synthesizer', 'Cupboard_open_or_close', 'Speech', 'Wild_animals', 'Thunderstorm', 'Child_speech_and_kid_speaking', 'Typewriter', 'Train', 'Cymbal', 'Crackle', 'Plucked_string_instrument', 'Bicycle_bell', 'Cricket', 'Crushing', 'Splash_and_splatter', 'Stream', 'Fart', 'Mechanical_fan', 'Bicycle', 'Fireworks', 'Screech', 'Traffic_noise_and_roadway_noise', 'Race_car_and_auto_racing', 'Marimba_and_xylophone', 'Drip', 'Wind_instrument_and_woodwind_instrument', 'Burping_and_eructation', 'Acoustic_guitar', 'Computer_keyboard', 'Sigh', 'Hands', 'Chicken_and_rooster', 'Shatter', 'Alarm', 'Bass_drum', 'Drill', 'Bird_vocalization_and_bird_call_and_bird_song', 'Chuckle_and_chortle', 'Dog', 'Fill_(with_liquid)', 'Idling', 'Subway_and_metro_and_underground', 'Pour', 'Fire', 'Siren', 'Crowd', 'Drawer_open_or_close', 'Vehicle_horn_and_car_horn_and_honking', 'Boat_and_Water_vehicle', 'Aircraft', 'Conversation', 'Brass_instrument', 'Singing', 'Tearing', 'Telephone', 'Bowed_string_instrument', 'Clapping', 'Boiling', 'Typing', 'Female_speech_and_woman_speaking', 'Cough', 'Frog', 'Breathing', 'Skateboard', 'Sliding_door', 'Tools', 'Walk_and_footsteps', 'Keys_jangling', 'Boom', 'Musical_instrument', 'Chirp_and_tweet', 'Buzz', 'Chewing_and_mastication', 'Gunshot_and_gunfire', 'Screaming', 'Animal', 'Insect', 'Fowl', 'Clock', 'Chime', 'Sawing', 'Strum', 'Crack', 'Giggle', 'Accelerating_and_revving_and_vroom', 'Human_group_actions', 'Crow', 'Power_tool', 'Slam', 'Doorbell', 'Wind_chime', 'Harmonica', 'Rattle_(instrument)', 'Sneeze', 'Keyboard_(musical)', 'Car', 'Trumpet', 'Dishes_and_pots_and_pans', 'Explosion', 'Toilet_flush', 'Trickle_and_dribble', 'Electric_guitar', 'Scissors', 'Finger_snapping', 'Bathtub_(filling_or_washing)', 'Glockenspiel', 'Laughter', 'Ocean', 'Engine_starting', 'Engine', 'Wind', 'Drum', 'Music', 'Zipper_(clothing)', 'Harp', 'Thump_and_thud', 'Fixed-wing_aircraft_and_airplane', 'Crumpling_and_crinkling', 'Mechanisms', 'Church_bell', 'Respiratory_sounds', 'Rain', 'Scratching_(performance_technique)', 'Tick-tock', 'Crash_cymbal', 'Packing_tape_and_duct_tape', 'Gasp', 'Applause', 'Whoosh_and_swoosh_and_swish', 'Purr', 'Rattle', 'Female_singing', 'Tambourine', 'Gurgling', 'Guitar', 'Meow', 'Wood', 'Motor_vehicle_(road)', 'Ratchet_and_pawl', 'Printer', 'Run', 'Rail_transport', 'Domestic_animals_and_pets', 'Water_tap_and_faucet', 'Writing', 'Yell', 'Glass', 'Whispering', 'Male_singing', 'Bird', 'Accordion', 'Bell', 'Chatter', 'Ringtone', 'Thunder', 'Shout', 'Coin_(dropping)', 'Tabla', 'Camera', 'Motorcycle', 'Knock', 'Drum_kit', 'Water', 'Bass_guitar', 'Growling', 'Hi-hat', 'Cutlery_and_silverware', 'Gong', 'Domestic_sounds_and_home_sounds', 'Snare_drum', 'Sink_(filling_or_washing)', 'Tap', 'Piano', 'Car_passing_by', 'Door', 'Tick', 'Organ', 'Frying_(food)', 'Bus', 'Bark', 'Vehicle', 'Microwave_oven', 'Waves_and_surf', 'Livestock_and_farm_animals_and_working_animals', 'Cowbell', 'Male_speech_and_man_speaking', 'Squeak', 'Hammer', 'Chink_and_clink', 'Percussion', 'Mallet_percussion', 'Liquid']\\n\\nclassify the audio multilabels\"}"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "58144c57",
   "metadata": {},
   "outputs": [],
   "source": [
    "from datasets import Dataset\n",
    "\n",
    "dataset = Dataset.from_list(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "acf78772",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f97a5278b78d492ab4a9061026ffafa0",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0863715dbcab48768e7eed17b383bf36",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Creating parquet from Arrow format:   0%|          | 0/52 [00:00<?, ?ba/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e4f54a0a0b16495ca832b0b25540d9a2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Uploading...:   0%|          | 0.00/2.25M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9d5a0d04e8294561a0cff7f39de8b68d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "README.md:   0%|          | 0.00/2.69k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "CommitInfo(commit_url='https://huggingface.co/datasets/mesolitica/Zeroshot-Audio-Classification-Instructions/commit/3ca4cf62acbf9fa9940bbba8e65dee33b290b55e', commit_message='Upload dataset', commit_description='', oid='3ca4cf62acbf9fa9940bbba8e65dee33b290b55e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/mesolitica/Zeroshot-Audio-Classification-Instructions', endpoint='https://huggingface.co', repo_type='dataset', repo_id='mesolitica/Zeroshot-Audio-Classification-Instructions'), pr_revision=None, pr_num=None)"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.push_to_hub('mesolitica/Zeroshot-Audio-Classification-Instructions', split = 'fsd50k')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "0d38299f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "51197"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from glob import glob\n",
    "\n",
    "audio_files = glob('FSD50k-audio/*.mp3')\n",
    "len(audio_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "bf571916",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.8G\tFSD50k-audio\r\n"
     ]
    }
   ],
   "source": [
    "!du -hs FSD50k-audio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "ddd19c4a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import zipfile\n",
    "\n",
    "with zipfile.ZipFile('FSD50k-audio.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:\n",
    "    for f in audio_files:\n",
    "        zipf.write(f, arcname=f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "d2b2444d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Uploading files using Xet Storage..\n",
      "Uploading...: 100%|████████████████████████| 1.77G/1.77G [00:33<00:00, 53.2MB/s]\n",
      "https://huggingface.co/datasets/mesolitica/Zeroshot-Audio-Classification-Instructions/blob/main/FSD50k-audio.zip\n"
     ]
    }
   ],
   "source": [
    "!huggingface-cli upload mesolitica/Zeroshot-Audio-Classification-Instructions FSD50k-audio.zip \\\n",
    "--repo-type=dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "6fca7043",
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -rf FSD50k FSD50k-audio.zip"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
